library(tidyverse)
library(janitor)
library(here)
library(readr)
salaries <- read_csv("data/salaries.csv")
beer <- read_delim("data/beer.txt", delim = ";", col_names = TRUE)
inmates <- read_tsv("data/inmates.tsv")
salaries %>%
select(beginning_salary, current_salary)
salaries %>%
filter(age > 50 &
current_salary > 20000)
salaries %>%
arrange(desc(current_salary))
salaries %>%
mutate(beginning_salary_pence = current_salary * 100)
salaries %>%
slice_max(age)
salaries %>%
slice_min(age)
salaries %>%
slice_min(current_salary)
salaries %>%
slice_max(current_salary)
salaries %>%
filter(age < 30)
salaries %>%
arrange(educational_level)
salaries %>%
select(-gender, -employment_category, -is_white)
salaries %>%
filter(work_experience < 2 | educational_level < 12)
salaries %>%
mutate(current_salary_k = current_salary / 1000)
salaries %>%
filter(gender == "female" & employment_category == "security officer")
salaries %>%
group_by(employment_category) %>%
summarise(average_salary = mean(current_salary))
## `summarise()` ungrouping output (override with `.groups` argument)
salaries %>%
filter(gender == "male") %>%
summarise(average_male_salary = mean(current_salary))
salaries %>%
group_by(gender, is_white) %>%
summarise(count_of_employees = n())
## `summarise()` regrouping output by 'gender' (override with `.groups` argument)
mutate(df, mean_age = mean(current_salary)) You may need to use View to see the whole data. Now use group_by with mutate and mean. What do you see?
salaries %>%
mutate(mean_salary = mean(current_salary))
salaries %>%
group_by(age) %>%
mutate(mean_salary = mean(current_salary))
salaries %>%
filter(educational_level <= 16) %>%
group_by(educational_level) %>%
summarise(average_salary = mean(current_salary)) %>%
arrange(desc(average_salary))
## `summarise()` ungrouping output (override with `.groups` argument)
salaries %>%
filter(work_experience >= 2) %>%
group_by(employment_category, gender) %>%
summarise(average_salary = mean(current_salary))
## `summarise()` regrouping output by 'employment_category' (override with `.groups` argument)
salaries %>%
group_by(employment_category) %>%
summarise(mean_salary = mean(current_salary),
median_salary = median(current_salary)) %>%
mutate(difference = mean_salary - median_salary) %>%
arrange(desc(difference))
## `summarise()` ungrouping output (override with `.groups` argument)
beer %>%
summarise(missing_carb_values = sum(is.na(carbohydrates)))
beer %>%
filter(is.na(brand))
inmates %>%
mutate(gender = recode(
gender, "Male" = "M",
"Female" = "F"))
inmates %>%
mutate(race = str_to_title(race)) %>%
mutate(race = recode(race, "Amer Ind" = "Other", "Asian" = "Other"))
inmates %>%
mutate(bond_level = if_else(
bond_amount > 1000000, "High", "Normal"
))
inmates %>%
mutate(bond_level = if_else(
bond_amount > 1000000, "High", "Normal"
)) %>%
filter(bond_level == "High") %>%
summarise(high_bond_count = n())
inmates %>%
mutate(detainer = str_to_title(detainer)) %>%
mutate(detainer = case_when(
detainer == "None" ~ "None",
detainer == "Imigration" ~ "Imigration",
detainer == "Federal" ~ "Federal",
TRUE ~ "Other"
))